clear
****************************************************************************
* Replace this next line with the directory where you've saved the files
****************************************************************************
*cd "C:\Documents and Settings\Ian\My Documents\PAE - Schools\Data\Publish"
cd "C:\Documents and Settings\Andrew Leigh\My publications\Aust - house prices & school quality\For Replication"

#delimit ;

***ASSEMBLE DATA ;

**PREDICT NO OF BEDROOMS where bedroom info is not provided (for robustness check);

use ACT-Boundary1;
xi: reg beds blksize ;
drop beds_hat ;
predict beds_hat ;
replace beds_hat=beds if beds~=. ;
save temp1 ;

use ACT-Boundary2;
xi: reg beds blksize ;
drop beds_hat ;
predict beds_hat ;
replace beds_hat=beds if beds~=. ;
save temp2 ;

use ACT-Boundary3 ;
xi: reg beds blksize ;
drop beds_hat ;
predict beds_hat ;
replace beds_hat=beds if beds~=. ;
save temp3 ;

use ACT-Boundary4;
xi: reg beds blksize ;
drop beds_hat ;
predict beds_hat ;
replace beds_hat=beds if beds~=. ;
save temp4 ;

use ACT-Boundary5;
xi: reg beds blksize ;
drop beds_hat ;
predict beds_hat ;
replace beds_hat=beds if beds~=. ;
save temp5 ;

use ACT-Boundary6;
xi: reg beds blksize ;
drop beds_hat ;
predict beds_hat ;
replace beds_hat=beds if beds~=. ;
save temp6 ;


**MERGE DATA ;

use temp2 ;
sort address ;
use temp3 ;
sort address ;
use temp4 ;
sort address ;
use temp5 ;
sort address ;
use temp6 ;
sort address ;
use temp1 ;
sort address ;
merge address using temp2 temp3 temp4 temp5 temp6 ; 

drop _merge1 ;
drop _merge2 ;
drop _merge3 ;
drop _merge4 ;
drop _merge5 ;
drop _merge ;

recode beds_hat min/2.49=2 2.50/3.4999=3 3.50/4.49=4 4.50/5.49=5 5.50/6.49=6 6.50/max=7 .=. ;


**GENERATE SALE DATES ;

generate sdate=date(date, "mdy");
format sdate %d ;
gen saleyear=year(sdate) ; 
gen salemonth=month(sdate) ;
gen saleqtr=quarter(sdate) ;
gen saleqy=(saleyear*10)+saleqtr ;


**ASSIGN TEST SCORE TO SCHOOLS ;

gen uai=. ;
replace uai=84.6 if school=="Copeland" & saleyear==2005 ;
replace uai=75.8 if school=="Copeland" & saleyear==2004 ;
replace uai=80.35 if school=="Copeland" & saleyear==2003 ;

replace uai=73.3 if school=="Canberra" & saleyear==2005 ;
replace uai=74.5 if school=="Canberra" & saleyear==2004 ;
replace uai=77.05 if school=="Canberra" & saleyear==2003 ;

replace uai=77.3 if school=="Dickson" & saleyear==2005 ;
replace uai=80.5 if school=="Dickson" & saleyear==2004 ;
replace uai=67.5 if school=="Dickson" & saleyear==2003 ;

replace uai=72.6 if school=="Erindale" & saleyear==2005 ;
replace uai=74.5 if school=="Erindale" & saleyear==2004 ;
replace uai=73.02 if school=="Erindale" & saleyear==2003 ;

replace uai=71.25 if school=="Hawker" & saleyear==2005 ;
replace uai=77 if school=="Hawker" & saleyear==2004 ;
replace uai=72.7 if school=="Hawker" & saleyear==2003 ;

replace uai=66.8 if school=="Lginn" & saleyear==2005 ;
replace uai=69.7 if school=="Lginn" & saleyear==2004 ;
replace uai=69.7 if school=="Lginn" & saleyear==2003 ;

replace uai=77.3 if school=="Ltugg" & saleyear==2005 ;
replace uai=68.5 if school=="Ltugg" & saleyear==2004 ;
replace uai=72.85 if school=="Ltugg" & saleyear==2003 ;

replace uai=83.3 if school=="Narrabundah" & saleyear==2005 ;
replace uai=85.6 if school=="Narrabundah" & saleyear==2004 ;
replace uai=85.25 if school=="Narrabundah" & saleyear==2003 ;


**ADD CENSUS DATA ;

generate postcode=0 ;
replace postcode=2905 if suburb=="Chisolm" ;
replace postcode=2617 if suburb=="Evatt" ;
replace postcode=2615 if suburb=="Florely" ;
replace postcode=2605 if suburb=="Garran" ;
replace postcode=2615 if suburb=="Higgins" ;
replace postcode=2615 if suburb=="Holt" ;
replace postcode=2905 if suburb=="Isabella Plains" ;
replace postcode=2617 if suburb=="Kaleen" ;
replace postcode=2615 if suburb=="Latham" ;
replace postcode=2602 if suburb=="Lyneham" ;
replace postcode=2615 if suburb=="Macgregor" ;
replace postcode=2614 if suburb=="Macquarie" ;
replace postcode=2617 if suburb=="Mckellar" ;
replace postcode=2904 if suburb=="Monash" ;
replace postcode=2603 if suburb=="Red Hill" ;
replace postcode=2905 if suburb=="Richardson" ;
replace postcode=2614 if suburb=="Scullin" ;
replace postcode=2905 if suburb=="Weetangera" ;

sort postcode ;
save temp7, replace ;
use pcode_data_2001 ;
sort postcode ;
merge postcode using temp7 ;

keep if postcode==2602 | postcode==2603 | postcode==2605 | postcode==2614 | 
postcode==2615 | postcode==2617 | postcode==2904 | postcode==2905 ;

** LOG KEY VARIABLES ;

gen logprice = ln(price) ;
gen loguai = ln(uai);
gen logblksize = ln(blksize);

gen blksize_sqd = (blksize)^2 ;
gen blksize_cbd = (blksize)^3 ;
gen logblksize_squd = ln(blksize)^2 ;
gen logblksize_cbd = ln(blksize)^3 ;

**COMBINE BATH AND ENSUITE ;

rename bath bathrooms ;
gen bath=bathrooms + ens ;
replace bath=bathrooms if bath==. ;
replace bath=ens if bath==. ;


**WEIGHT BOUNDARIES (for robustness check);

drop temp ;
bysort boundary: egen weight=count(price);
egen temp=count(price);
replace weight=temp/weight ;


**TIDY ;


drop blk ; 
drop sec ; 
drop ucv ; 
drop addate ;
drop links ;

save ACT_BoundaryALL_Final, replace ;

erase temp1.dta ;
erase temp2.dta ;
erase temp3.dta ;
erase temp4.dta ;
erase temp5.dta ;
erase temp6.dta ;
erase temp7.dta ; 



























